setwd("~/HOT_R_working_directory")
save.image(file = "HOT_anal-4Jul.RData")
load("~/HOT_R_working_directory/HOT_anal-4Jul.RData")

# This will force R to give the whole digits, not scientific notation
options("scipen"=100, "digits"=4)

library(edgeR)
## Loading required package: limma
library(reshape2)
library(ggplot2)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
library(plyr)
library(UpSetR)
## 
## Attaching package: 'UpSetR'
## The following object is masked from 'package:lattice':
## 
##     histogram
########## Supplimental figure 1 : CTD Data
Data = read.csv("CTD_ALL_averages.csv", header = T)
head(Data)
##   Depth Temp_268 Temp_271 Temp_275 Temp_279 NO3_268 NO3_271 NO3_275 NO3_279
## 1     0    24.44    24.13    27.09    26.01   4.933  1.4044   2.514   4.276
## 2     2    24.64    24.22    27.10    26.01   4.627  1.2927   2.506   4.186
## 3     4    24.64    24.22    27.11    26.00   4.294  0.8582   2.618   3.941
## 4     6    24.64    24.22    27.11    26.01   4.107  0.7019   2.243   3.865
## 5     8    24.64    24.22    27.11    26.01   4.338  0.7934   2.270   3.694
## 6    10    24.64    24.22    27.10    26.01   4.049  0.8162   2.244   3.825
##   O2_268 O2_271 O2_275 O2_279 Chl_268 Chl_271 Chl_275 Chl_279 Sal_268 Sal_271
## 1  208.2  210.0  201.0  202.5 0.08154  0.1210  0.0007  0.1571   35.33   35.14
## 2  208.2  209.9  201.0  202.5 0.19695  0.1477  0.0007  0.1571   35.33   35.14
## 3  208.2  210.2  201.0  202.8 0.24758  0.1063  0.0698  0.1667   35.33   35.14
## 4  208.3  210.2  200.9  202.7 0.25912  0.1134  0.0751  0.1778   35.33   35.13
## 5  208.0  210.0  201.2  202.7 0.25446  0.1131  0.0735  0.1762   35.33   35.13
## 6  208.1  210.1  201.3  202.7 0.26717  0.1032  0.0754  0.1650   35.33   35.13
##   Sal_275 Sal_279
## 1   35.06      35
## 2   35.06      35
## 3   35.06      35
## 4   35.06      35
## 5   35.06      35
## 6   35.06      35
#qplot(Data$DBAR, Data$Temp_avg, geom = "line", )
Temps = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Temp_268, colour = "Temp_268")) + geom_line(aes(y = Temp_271, colour = "Temp_271")) + geom_line(aes(y = Temp_275, colour = "Temp_275")) + geom_line(aes(y = Temp_279, colour = "Temp_279")) + labs(y = "Temperature (ËšC)", x = "Depth (m)")
Oxygen = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = O2_268, colour = "O2_268")) + geom_line(aes(y = O2_271, colour = "O2_271")) + geom_line(aes(y = O2_275, colour = "O2_275")) + geom_line(aes(y = O2_279, colour = "O2_279")) + labs(y = "Oxygen (uM/L)", x = "Depth (m)")
Chlphyl = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Chl_268, colour = "Chl_268")) + geom_line(aes(y = Chl_271, colour = "Chl_271")) + geom_line(aes(y = Chl_275, colour = "Chl_275")) + geom_line(aes(y = Chl_279, colour = "Chl_279")) + labs(y = "Chl-a (ug/L)", x = "Depth (m)")
Salin = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = Sal_268, colour = "Sal_268")) + geom_line(aes(y = Sal_271, colour = "Sal_271")) + geom_line(aes(y = Sal_275, colour = "Sal_275")) + geom_line(aes(y = Sal_279, colour = "Sal_279")) + labs(y = "Salinity (pss)", x = "Depth (m)")
Nitrate = ggplot(Data, aes(x = Depth)) + geom_line(aes(y = NO3_268, colour = "NO3_268")) + geom_line(aes(y = NO3_271, colour = "NO3_271")) + geom_line(aes(y = NO3_275, colour = "NO3_275")) + geom_line(aes(y = NO3_279, colour = "NO3_279")) + labs(y = "Nitrate (uM/L)", x = "Depth (m)")



#Import ASV table
ASV_table<-read.table("Raw_table_for_manu.txt",header=TRUE, comment.char = "")
Header = c("ASV.ID","5m.Dec2014","25m.Dec2014","45m.Dec2014","75m.Dec2014","100m.Dec2014","125m.Dec2014","150m.Dec2014","175m.Dec2014","300m.Dec2014","400m.Dec2014","500m.Dec2014","770m.Dec2014","5m.Apr","25m.Apr","75m.Apr","100m.Apr","125m.Apr","150m.Apr","175m.Apr","300m.Apr","400m.Apr","500m.Apr","770m.Apr","5m.Aug","25m.Aug","45m.Aug","75m.Aug","100m.Aug","125m.Aug","150m.Aug","175m.Aug","300m.Aug","400m.Aug","500m.Aug","770m.Aug","5m.Dec2015","25m.Dec2015","45m.Dec2015","75m.Dec2015","100m.Dec2015","125m.Dec2015","150m.Dec2015","175m.Dec2015","300m.Dec2015","400m.Dec2015","500m.Dec2015","770m.Dec2015","Taxonomy")
colnames(ASV_table) = Header #Swap the current header for the custom header for columnames
rownames(ASV_table) = ASV_table$ASV.ID #Here I want to give the rownames the ASV IDs
#dim(ASV_table) #Check the dimensions. There are 11538rows and 49 columns

#Calculate the total number of sequence reads
Global_Seq_total = sum(colSums(ASV_table[,2:48]))
#there are 2,598,281 total reads in the dataset

##Filter out OTUs with only 1 sequence in the whole dataset (global singletons)
ASV_total<-apply(ASV_table[2:48],1,sum) #here we are summing each row in the dataset, where each row represents an ASV and its abundances across the different samples (columns)

#apply(ASV_table[2:48], 2, sum)
ASV_table.no1 = ASV_table[ ASV_total>1, ]  #Here we are retaining only ASVs that have more than one sequence
removed = dim(ASV_table)[1] - dim(ASV_table.no1)[1] #The number of singleton ASVs lost in the previous step
# 343 singleton ASVs were removed

########### Sequence Analysis and plots ############
Data_cols = ASV_table.no1[,2:48]
sequence_total<-apply(Data_cols,2,sum) #number of sequences per sample(column)
ASV_count<-colSums(Data_cols>0) #total number of OTUs
ASV_singleton<-colSums(Data_cols==1) #Number of singleton OTUs
ASV_doubleton<-colSums(Data_cols==2) #Number of doubleton OTUs
ASV_else<-colSums(Data_cols>2) #Number of OTUs with more than 2 sequences

#dataframe with OTU stats per sample
ASVsample_info<-data.frame(sequence_total,ASV_count,ASV_singleton,ASV_doubleton,ASV_else)# Compile
#Plot it
ASVsample_info$samples<-row.names(ASVsample_info)
ASVsample_info$samples = factor(ASVsample_info$samples, levels = ASVsample_info$samples) ##***** this trick is to keep the order on the X axis
Melted<-melt(ASVsample_info) #Melt to long format part of the reshape package
## Using samples as id variables
#head(Melted)

Melted$Figure<-"Sequences"
Melted$Figure[Melted$variable == "ASV_count"]<-"Total ASVs"
ASVdist<-c("ASV_singleton", "ASV_doubleton", "ASV_else")
Melted$Figure[Melted$variable %in% ASVdist]<-"Breakdown of ASVs"
Melted
##          samples       variable  value            Figure
## 1     5m.Dec2014 sequence_total  86504         Sequences
## 2    25m.Dec2014 sequence_total  64988         Sequences
## 3    45m.Dec2014 sequence_total  55678         Sequences
## 4    75m.Dec2014 sequence_total  87424         Sequences
## 5   100m.Dec2014 sequence_total  53931         Sequences
## 6   125m.Dec2014 sequence_total  50649         Sequences
## 7   150m.Dec2014 sequence_total  68535         Sequences
## 8   175m.Dec2014 sequence_total  54520         Sequences
## 9   300m.Dec2014 sequence_total 132084         Sequences
## 10  400m.Dec2014 sequence_total  80535         Sequences
## 11  500m.Dec2014 sequence_total  78374         Sequences
## 12  770m.Dec2014 sequence_total 101613         Sequences
## 13        5m.Apr sequence_total  57199         Sequences
## 14       25m.Apr sequence_total  62103         Sequences
## 15       75m.Apr sequence_total  48195         Sequences
## 16      100m.Apr sequence_total  50921         Sequences
## 17      125m.Apr sequence_total  55271         Sequences
## 18      150m.Apr sequence_total  53244         Sequences
## 19      175m.Apr sequence_total  43776         Sequences
## 20      300m.Apr sequence_total  44903         Sequences
## 21      400m.Apr sequence_total  85980         Sequences
## 22      500m.Apr sequence_total  74359         Sequences
## 23      770m.Apr sequence_total  79572         Sequences
## 24        5m.Aug sequence_total  97523         Sequences
## 25       25m.Aug sequence_total  83069         Sequences
## 26       45m.Aug sequence_total  76894         Sequences
## 27       75m.Aug sequence_total 104836         Sequences
## 28      100m.Aug sequence_total  50420         Sequences
## 29      125m.Aug sequence_total  24246         Sequences
## 30      150m.Aug sequence_total  30023         Sequences
## 31      175m.Aug sequence_total  32852         Sequences
## 32      300m.Aug sequence_total  26720         Sequences
## 33      400m.Aug sequence_total  33464         Sequences
## 34      500m.Aug sequence_total  29631         Sequences
## 35      770m.Aug sequence_total  32917         Sequences
## 36    5m.Dec2015 sequence_total  44278         Sequences
## 37   25m.Dec2015 sequence_total  60138         Sequences
## 38   45m.Dec2015 sequence_total  33859         Sequences
## 39   75m.Dec2015 sequence_total  32924         Sequences
## 40  100m.Dec2015 sequence_total  31289         Sequences
## 41  125m.Dec2015 sequence_total  23897         Sequences
## 42  150m.Dec2015 sequence_total  27516         Sequences
## 43  175m.Dec2015 sequence_total  29800         Sequences
## 44  300m.Dec2015 sequence_total  30907         Sequences
## 45  400m.Dec2015 sequence_total  23215         Sequences
## 46  500m.Dec2015 sequence_total  24524         Sequences
## 47  770m.Dec2015 sequence_total  42638         Sequences
## 48    5m.Dec2014      ASV_count    768        Total ASVs
## 49   25m.Dec2014      ASV_count    682        Total ASVs
## 50   45m.Dec2014      ASV_count    608        Total ASVs
## 51   75m.Dec2014      ASV_count    780        Total ASVs
## 52  100m.Dec2014      ASV_count    746        Total ASVs
## 53  125m.Dec2014      ASV_count    658        Total ASVs
## 54  150m.Dec2014      ASV_count    817        Total ASVs
## 55  175m.Dec2014      ASV_count    552        Total ASVs
## 56  300m.Dec2014      ASV_count   1173        Total ASVs
## 57  400m.Dec2014      ASV_count    935        Total ASVs
## 58  500m.Dec2014      ASV_count    817        Total ASVs
## 59  770m.Dec2014      ASV_count    878        Total ASVs
## 60        5m.Apr      ASV_count    648        Total ASVs
## 61       25m.Apr      ASV_count    665        Total ASVs
## 62       75m.Apr      ASV_count    747        Total ASVs
## 63      100m.Apr      ASV_count    848        Total ASVs
## 64      125m.Apr      ASV_count    727        Total ASVs
## 65      150m.Apr      ASV_count    768        Total ASVs
## 66      175m.Apr      ASV_count    686        Total ASVs
## 67      300m.Apr      ASV_count    744        Total ASVs
## 68      400m.Apr      ASV_count    977        Total ASVs
## 69      500m.Apr      ASV_count   1045        Total ASVs
## 70      770m.Apr      ASV_count    817        Total ASVs
## 71        5m.Aug      ASV_count    787        Total ASVs
## 72       25m.Aug      ASV_count    907        Total ASVs
## 73       45m.Aug      ASV_count    904        Total ASVs
## 74       75m.Aug      ASV_count    818        Total ASVs
## 75      100m.Aug      ASV_count    713        Total ASVs
## 76      125m.Aug      ASV_count    523        Total ASVs
## 77      150m.Aug      ASV_count    636        Total ASVs
## 78      175m.Aug      ASV_count    734        Total ASVs
## 79      300m.Aug      ASV_count    584        Total ASVs
## 80      400m.Aug      ASV_count    697        Total ASVs
## 81      500m.Aug      ASV_count    668        Total ASVs
## 82      770m.Aug      ASV_count    618        Total ASVs
## 83    5m.Dec2015      ASV_count    639        Total ASVs
## 84   25m.Dec2015      ASV_count    800        Total ASVs
## 85   45m.Dec2015      ASV_count    647        Total ASVs
## 86   75m.Dec2015      ASV_count    702        Total ASVs
## 87  100m.Dec2015      ASV_count    542        Total ASVs
## 88  125m.Dec2015      ASV_count    469        Total ASVs
## 89  150m.Dec2015      ASV_count    560        Total ASVs
## 90  175m.Dec2015      ASV_count    593        Total ASVs
## 91  300m.Dec2015      ASV_count    658        Total ASVs
## 92  400m.Dec2015      ASV_count    554        Total ASVs
## 93  500m.Dec2015      ASV_count    480        Total ASVs
## 94  770m.Dec2015      ASV_count    665        Total ASVs
## 95    5m.Dec2014  ASV_singleton      2 Breakdown of ASVs
## 96   25m.Dec2014  ASV_singleton      0 Breakdown of ASVs
## 97   45m.Dec2014  ASV_singleton      0 Breakdown of ASVs
## 98   75m.Dec2014  ASV_singleton      0 Breakdown of ASVs
## 99  100m.Dec2014  ASV_singleton      2 Breakdown of ASVs
## 100 125m.Dec2014  ASV_singleton      3 Breakdown of ASVs
## 101 150m.Dec2014  ASV_singleton      6 Breakdown of ASVs
## 102 175m.Dec2014  ASV_singleton      1 Breakdown of ASVs
## 103 300m.Dec2014  ASV_singleton      3 Breakdown of ASVs
## 104 400m.Dec2014  ASV_singleton      1 Breakdown of ASVs
## 105 500m.Dec2014  ASV_singleton      2 Breakdown of ASVs
## 106 770m.Dec2014  ASV_singleton      2 Breakdown of ASVs
## 107       5m.Apr  ASV_singleton      0 Breakdown of ASVs
## 108      25m.Apr  ASV_singleton      0 Breakdown of ASVs
## 109      75m.Apr  ASV_singleton      2 Breakdown of ASVs
## 110     100m.Apr  ASV_singleton      1 Breakdown of ASVs
## 111     125m.Apr  ASV_singleton      1 Breakdown of ASVs
## 112     150m.Apr  ASV_singleton      4 Breakdown of ASVs
## 113     175m.Apr  ASV_singleton      2 Breakdown of ASVs
## 114     300m.Apr  ASV_singleton      0 Breakdown of ASVs
## 115     400m.Apr  ASV_singleton      2 Breakdown of ASVs
## 116     500m.Apr  ASV_singleton      2 Breakdown of ASVs
## 117     770m.Apr  ASV_singleton      0 Breakdown of ASVs
## 118       5m.Aug  ASV_singleton      0 Breakdown of ASVs
## 119      25m.Aug  ASV_singleton      1 Breakdown of ASVs
## 120      45m.Aug  ASV_singleton      0 Breakdown of ASVs
## 121      75m.Aug  ASV_singleton      1 Breakdown of ASVs
## 122     100m.Aug  ASV_singleton      1 Breakdown of ASVs
## 123     125m.Aug  ASV_singleton      1 Breakdown of ASVs
## 124     150m.Aug  ASV_singleton      0 Breakdown of ASVs
## 125     175m.Aug  ASV_singleton      2 Breakdown of ASVs
## 126     300m.Aug  ASV_singleton      0 Breakdown of ASVs
## 127     400m.Aug  ASV_singleton      1 Breakdown of ASVs
## 128     500m.Aug  ASV_singleton      1 Breakdown of ASVs
## 129     770m.Aug  ASV_singleton      0 Breakdown of ASVs
## 130   5m.Dec2015  ASV_singleton      0 Breakdown of ASVs
## 131  25m.Dec2015  ASV_singleton      0 Breakdown of ASVs
## 132  45m.Dec2015  ASV_singleton      1 Breakdown of ASVs
## 133  75m.Dec2015  ASV_singleton      1 Breakdown of ASVs
## 134 100m.Dec2015  ASV_singleton      2 Breakdown of ASVs
## 135 125m.Dec2015  ASV_singleton      4 Breakdown of ASVs
## 136 150m.Dec2015  ASV_singleton      3 Breakdown of ASVs
## 137 175m.Dec2015  ASV_singleton      2 Breakdown of ASVs
## 138 300m.Dec2015  ASV_singleton      2 Breakdown of ASVs
## 139 400m.Dec2015  ASV_singleton      1 Breakdown of ASVs
## 140 500m.Dec2015  ASV_singleton      0 Breakdown of ASVs
## 141 770m.Dec2015  ASV_singleton      1 Breakdown of ASVs
## 142   5m.Dec2014  ASV_doubleton     12 Breakdown of ASVs
## 143  25m.Dec2014  ASV_doubleton     13 Breakdown of ASVs
## 144  45m.Dec2014  ASV_doubleton      6 Breakdown of ASVs
## 145  75m.Dec2014  ASV_doubleton     10 Breakdown of ASVs
## 146 100m.Dec2014  ASV_doubleton     16 Breakdown of ASVs
## 147 125m.Dec2014  ASV_doubleton     10 Breakdown of ASVs
## 148 150m.Dec2014  ASV_doubleton     14 Breakdown of ASVs
## 149 175m.Dec2014  ASV_doubleton      9 Breakdown of ASVs
## 150 300m.Dec2014  ASV_doubleton      5 Breakdown of ASVs
## 151 400m.Dec2014  ASV_doubleton     19 Breakdown of ASVs
## 152 500m.Dec2014  ASV_doubleton     12 Breakdown of ASVs
## 153 770m.Dec2014  ASV_doubleton      8 Breakdown of ASVs
## 154       5m.Apr  ASV_doubleton     17 Breakdown of ASVs
## 155      25m.Apr  ASV_doubleton     19 Breakdown of ASVs
## 156      75m.Apr  ASV_doubleton     32 Breakdown of ASVs
## 157     100m.Apr  ASV_doubleton     46 Breakdown of ASVs
## 158     125m.Apr  ASV_doubleton     26 Breakdown of ASVs
## 159     150m.Apr  ASV_doubleton     34 Breakdown of ASVs
## 160     175m.Apr  ASV_doubleton     25 Breakdown of ASVs
## 161     300m.Apr  ASV_doubleton     31 Breakdown of ASVs
## 162     400m.Apr  ASV_doubleton     37 Breakdown of ASVs
## 163     500m.Apr  ASV_doubleton     40 Breakdown of ASVs
## 164     770m.Apr  ASV_doubleton     30 Breakdown of ASVs
## 165       5m.Aug  ASV_doubleton      1 Breakdown of ASVs
## 166      25m.Aug  ASV_doubleton      4 Breakdown of ASVs
## 167      45m.Aug  ASV_doubleton      5 Breakdown of ASVs
## 168      75m.Aug  ASV_doubleton      1 Breakdown of ASVs
## 169     100m.Aug  ASV_doubleton     26 Breakdown of ASVs
## 170     125m.Aug  ASV_doubleton     22 Breakdown of ASVs
## 171     150m.Aug  ASV_doubleton     24 Breakdown of ASVs
## 172     175m.Aug  ASV_doubleton     19 Breakdown of ASVs
## 173     300m.Aug  ASV_doubleton     11 Breakdown of ASVs
## 174     400m.Aug  ASV_doubleton     19 Breakdown of ASVs
## 175     500m.Aug  ASV_doubleton     15 Breakdown of ASVs
## 176     770m.Aug  ASV_doubleton      6 Breakdown of ASVs
## 177   5m.Dec2015  ASV_doubleton      0 Breakdown of ASVs
## 178  25m.Dec2015  ASV_doubleton      9 Breakdown of ASVs
## 179  45m.Dec2015  ASV_doubleton     19 Breakdown of ASVs
## 180  75m.Dec2015  ASV_doubleton     28 Breakdown of ASVs
## 181 100m.Dec2015  ASV_doubleton     22 Breakdown of ASVs
## 182 125m.Dec2015  ASV_doubleton     15 Breakdown of ASVs
## 183 150m.Dec2015  ASV_doubleton     11 Breakdown of ASVs
## 184 175m.Dec2015  ASV_doubleton     21 Breakdown of ASVs
## 185 300m.Dec2015  ASV_doubleton     13 Breakdown of ASVs
## 186 400m.Dec2015  ASV_doubleton     16 Breakdown of ASVs
## 187 500m.Dec2015  ASV_doubleton      9 Breakdown of ASVs
## 188 770m.Dec2015  ASV_doubleton     14 Breakdown of ASVs
## 189   5m.Dec2014       ASV_else    754 Breakdown of ASVs
## 190  25m.Dec2014       ASV_else    669 Breakdown of ASVs
## 191  45m.Dec2014       ASV_else    602 Breakdown of ASVs
## 192  75m.Dec2014       ASV_else    770 Breakdown of ASVs
## 193 100m.Dec2014       ASV_else    728 Breakdown of ASVs
## 194 125m.Dec2014       ASV_else    645 Breakdown of ASVs
## 195 150m.Dec2014       ASV_else    797 Breakdown of ASVs
## 196 175m.Dec2014       ASV_else    542 Breakdown of ASVs
## 197 300m.Dec2014       ASV_else   1165 Breakdown of ASVs
## 198 400m.Dec2014       ASV_else    915 Breakdown of ASVs
## 199 500m.Dec2014       ASV_else    803 Breakdown of ASVs
## 200 770m.Dec2014       ASV_else    868 Breakdown of ASVs
## 201       5m.Apr       ASV_else    631 Breakdown of ASVs
## 202      25m.Apr       ASV_else    646 Breakdown of ASVs
## 203      75m.Apr       ASV_else    713 Breakdown of ASVs
## 204     100m.Apr       ASV_else    801 Breakdown of ASVs
## 205     125m.Apr       ASV_else    700 Breakdown of ASVs
## 206     150m.Apr       ASV_else    730 Breakdown of ASVs
## 207     175m.Apr       ASV_else    659 Breakdown of ASVs
## 208     300m.Apr       ASV_else    713 Breakdown of ASVs
## 209     400m.Apr       ASV_else    938 Breakdown of ASVs
## 210     500m.Apr       ASV_else   1003 Breakdown of ASVs
## 211     770m.Apr       ASV_else    787 Breakdown of ASVs
## 212       5m.Aug       ASV_else    786 Breakdown of ASVs
## 213      25m.Aug       ASV_else    902 Breakdown of ASVs
## 214      45m.Aug       ASV_else    899 Breakdown of ASVs
## 215      75m.Aug       ASV_else    816 Breakdown of ASVs
## 216     100m.Aug       ASV_else    686 Breakdown of ASVs
## 217     125m.Aug       ASV_else    500 Breakdown of ASVs
## 218     150m.Aug       ASV_else    612 Breakdown of ASVs
## 219     175m.Aug       ASV_else    713 Breakdown of ASVs
## 220     300m.Aug       ASV_else    573 Breakdown of ASVs
## 221     400m.Aug       ASV_else    677 Breakdown of ASVs
## 222     500m.Aug       ASV_else    652 Breakdown of ASVs
## 223     770m.Aug       ASV_else    612 Breakdown of ASVs
## 224   5m.Dec2015       ASV_else    639 Breakdown of ASVs
## 225  25m.Dec2015       ASV_else    791 Breakdown of ASVs
## 226  45m.Dec2015       ASV_else    627 Breakdown of ASVs
## 227  75m.Dec2015       ASV_else    673 Breakdown of ASVs
## 228 100m.Dec2015       ASV_else    518 Breakdown of ASVs
## 229 125m.Dec2015       ASV_else    450 Breakdown of ASVs
## 230 150m.Dec2015       ASV_else    546 Breakdown of ASVs
## 231 175m.Dec2015       ASV_else    570 Breakdown of ASVs
## 232 300m.Dec2015       ASV_else    643 Breakdown of ASVs
## 233 400m.Dec2015       ASV_else    537 Breakdown of ASVs
## 234 500m.Dec2015       ASV_else    471 Breakdown of ASVs
## 235 770m.Dec2015       ASV_else    650 Breakdown of ASVs
#Basic bar plot
ASVbar_statistics <- ggplot(Melted, aes(x=samples, y=value, fill=variable))+geom_bar(stat="identity",position="stack",color="black") +theme_bw()+theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5,size=8),axis.text.y=element_text(size=12),legend.position = "top")

ASVbar_statistics

ASVbar_statistics %+% subset(Melted, Figure %in% "Breakdown of ASVs")+labs(title="Distribution of ASVs",x="Samples", y="Total ASVs")+scale_fill_manual(values=c("#e41a1c","#fee08b","#4393c3"))

ASVbar_statistics %+% subset(Melted, Figure %in% "Total ASVs")+labs(title="Total number of ASVs",x="Samples", y="Total ASVs")+scale_fill_manual(values=c("darkblue"))

ASVbar_statistics %+% subset(Melted, Figure %in% "Sequences")+labs(title="Total number of sequences",x="Samples", y="Total sequences")+scale_fill_manual(values=c("purple"))

############ Normalize using edgeR #############
#apply(ASV_table.no1[2:48],2,sum)
ListDGE = DGEList(Data_cols)
#ListDGE
ListDGE = calcNormFactors(ListDGE, method = "TMM")
#ListDGE
TMMNorm_ASV_DataCols = cpm(ListDGE)
#head(TMMNorm_ASV_DataCols)
TMMNorm_ASV_table = as.data.frame(TMMNorm_ASV_DataCols)
TMMNorm_ASV_table$ASV.ID = row.names(TMMNorm_ASV_table)
Joined<-join(TMMNorm_ASV_table, ASV_table.no1[c(1,49)], by="ASV.ID", type="left", match="first")

pr2_rename_taxa<-function(df){
  split<-colsplit(df$Taxon, ";", c("Level1","Level2","Level3","Level4","Level5","Level6", "Level7","Level8","Level9", "Level10", "Level11", "Level12"))
  split[ is.na(split) ] = "XXX"
  split[ split == "" ] = "XXX"
  split$Taxa<-"Other/unknown"
  split$Taxa[split$Level1 == "No blast hit"]="No blast hit"
  split$Taxa[split$Level1 == "Unassigned"]="Unassigned"
  split$Taxa[split$Level1 == "None"]="None"
  split$Taxa[split$Level2=="Amoebozoa"]="Amoebozoa"
  split$Taxa[split$Level2=="Apusozoa"]="Other/unknown"
  split$Taxa[split$Level2=="Eukaryota"]="Other/unknown"
  split$Taxa[split$Level2=="Stramenopiles"]="Stramenopiles-Other"
  split$Taxa[split$Level2=="Alveolata"]="Alveolates-Other"
  split$Taxa[split$Level2=="Opisthokonta"]="Opisthokonts-Other"
  split$Taxa[split$Level2=="Archaeplastida"]="Archaeplastids"
  split$Taxa[split$Level2=="Excavata"]="Excavates"
  split$Taxa[split$Level2=="Rhizaria"]="Rhizaria-Other"
  split$Taxa[split$Level2=="Hacrobia"]="Hacrobia-other"
  #  split$Taxa[split$Level3=="Telonemia"]="Hacrobia-Telonemia"
  split$Taxa[split$Level3=="Haptophyta"]="Hacrobia-Haptophytes"
  split$Taxa[split$Level3=="Fungi"]="Opisthokont-Fungi"
  split$Taxa[split$Level3=="Metazoa"]="Opisthokont-Metazoa"
  split$Taxa[split$Level3=="Foraminifera"]="Rhizaria-Foraminifera"
  split$Taxa[split$Level3=="Dinophyta"]="Dinoflagellates-other"
  split$Taxa[split$Level4=="Syndiniales"]="Alveolates-Syndiniales"
  split$Taxa[split$Level3=="Cryptophyta"]="Cryptophytes"
  split$Taxa[split$Level3=="Ciliophora"]="Alveolates-Ciliates"
  #  split$Taxa[split$Level3=="Chlorophyta"]="Archaeplastids-Chlorophytes"
  split$Taxa[split$Level3=="Cercozoa"]="Rhizaria-Cercozoa"
  split$Taxa[split$Level3=="Radiolaria"]="Rhizaria-Radiolaria"
  split$Taxa[split$Level3=="Ochrophyta"]="Stramenopiles-Ochrophyta"
  split$Taxa[split$Level3=="Choanoflagellida"]="Opisthokont-Choanoflagellida"
  #  split$Taxa[split$Level3=="Katablepharidophyta"]="Hacrobia-Katablepharidophyta"
  #  split$Taxa[split$Level3=="Picozoa"]="Hacrobia-Picozoa"
  #  split$Taxa[split$Level4=="Acantharea"]="Radiolaria-Acantharia"
  #  split$Taxa[split$Level4=="Chrysophyceae-Synurophyceae"]="Stramenopiles-Chrysophytes"
  split$Taxa[split$Level4=="Pelagophyceae"]="Stramenopiles-Pelagophytes"
  split$Taxa[split$Level4=="Bacillariophyta"]="Stramenopiles-Diatoms"
  split$Taxa[split$Level4=="MAST"]="Stramenopiles-MAST"
  # split$Taxa[split$Level4=="Polycystinea"]="Rhizaria-Polycystines" #Leave out of the main bar plot. its supposed to be an overview.
  split$Taxa[split$Level4=="Dinophyceae"]="Alveolates-Dinophyceae"
  #  split$Taxa[split$Level4=="Labyrinthulea"]="Stramenopiles-Labyrinthulea" #Only include in the bubble plot or whatever will be high resolution. These parasites are important because they show up in the deep.
  #  split$Taxa[split$Level4=="Prymnesiophyceae"]="Haptophyta-Prymnesiophyceae" #Leave this one out because there is nothing more to gain by pulling it out other than just saying there is micro diversity
  #  split$Taxa[split$Level4=="Chrysophyceae"]="Stramenopiles-Chrysophyceae"
  #  split$Taxa[split$Level4=="Dictyochophyceae"]="Stramenopiles-Dictyochophyceae"
  #  split$Taxa[split$Level4=="Spirotrichea"]="Spirotrichea"
  #  split$Taxa[split$Level4=="Oligohymenophorea"]="Oligohymenophorea"
  #  split$Taxa[split$Level4=="Litostomatea"]="Litostomatea"
  #  split$Taxa[split$Level4=="Phyllopharyngea"]="Phyllopharyngea"
  #  split$Taxa[split$Level6=="Pedinellales"]="Stramenopiles-Pedinellales"
  # split$Taxa[split$Level6=="Pterocoryothoidea"]="Radiolaria-Pterocoryothoidea" #Overlap with other radiolarians. If it comes down to it, i can show how much acantharia is there, but with so few radiolarians in the database it may not be worth listing these two, and a large 'other'
  #  split$Taxa[split$Level7=="Gymnodinium"]="Dinophyceae-Gymnodinium"
  #  split$Taxa[split$Level7=="Prorocentrum"]="Dinophyceae-Prorocentrum"
  #  split$Taxa[split$Level7=="Gyrodinium"]="Dinophyceae-Gyrodinium" #Seems redundant. If I find information supporting that they are distinct from gymnodinium...
  #  split$Taxa[split$Level7=="Neoceratium"]="Dinophyceae-Neoceratium"
  #  split$Taxa[split$Level7=="Protoperidinium"]="Dinophyceae-Protoperidinium"
  return(split)
} 

NT = pr2_rename_taxa(Joined)
DBin = data.frame(Joined, NT)
cnames = c("ASV.ID","5m Dec2014","25m Dec2014","45m Dec2014","75m Dec2014","100m Dec2014","125m Dec2014","155m Dec2014","175m Dec2014","300m Dec2014","400m Dec2014","500m Dec2014","770m Dec2014","5m Apr2015","25m Apr2015","75m Apr2015","100m Apr2015","125m Apr2015","150m Apr2015","175m Apr2015","300m Apr2015","400m Apr2015","500m Apr2015","770m Apr2015","5m Aug2015","25m Aug2015","45m Aug2015","75m Aug2015","100m Aug2015","125m Aug2015","150m Aug2015","175m Aug2015","300m Aug2015","400m Aug2015","500m Aug2015","770m Aug2015","5m Dec2015","25m Dec2015","45m Dec2015","75m Dec2015","100m Dec2015","125m Dec2015","150m Dec2015","175m Dec2015","300m Dec2015","400m Dec2015","500m Dec2015","770m Dec2015","Taxonomy","Level1","Level2","Level3","Level4","Level5","Level6","Level7","Level8","Level9","Level10","Level11","Level12","Taxa")
colnames(DBin) = cnames

#Keep in mind that there are a bunch of ASVs in Metazoa
#DBin = DBin[DBin$Taxa != "Stramenopiles-Pelagophytes",]

#write.csv(DBin, file = "HOT_for_Krona.csv") # output table for constructing Krona plots

################# Figures S4 & 2a Whole Dataset Ordination plots (NMDS & Cluster Dendrogram)
DBin_nums_only = DBin[,2:48]
DBin_relabund = decostand(DBin_nums_only, MARGIN = 2, method = "total") #convert to relative abundances
DBin_relabund.t = t(DBin_relabund) #convert to wide format

cluster<-hclust(dist(DBin_relabund.t), method="average") #Hierarchial clustering of analysis using euclidean distances 
FigureS4 = plot(cluster)

######### Figure 2a
# Calculate MDS with Bray-Curtis dissimilarity matrix
NMDS.bray=metaMDS(DBin_relabund.t,distance="bray",k=2)
## Run 0 stress 0.0659 
## Run 1 stress 0.0659 
## ... New best solution
## ... Procrustes: rmse 0.000192  max resid 0.0004533 
## ... Similar to previous best
## Run 2 stress 0.07009 
## Run 3 stress 0.06918 
## Run 4 stress 0.07009 
## Run 5 stress 0.07119 
## Run 6 stress 0.07684 
## Run 7 stress 0.07119 
## Run 8 stress 0.07823 
## Run 9 stress 0.07008 
## Run 10 stress 0.07397 
## Run 11 stress 0.06674 
## Run 12 stress 0.06918 
## Run 13 stress 0.06918 
## Run 14 stress 0.07247 
## Run 15 stress 0.07141 
## Run 16 stress 0.07009 
## Run 17 stress 0.07009 
## Run 18 stress 0.0659 
## ... New best solution
## ... Procrustes: rmse 0.000154  max resid 0.0004159 
## ... Similar to previous best
## Run 19 stress 0.06943 
## Run 20 stress 0.07084 
## *** Solution reached
# Plot extracted MDS points from each sample using ggplot2
NMDS.bray_pts<-NMDS.bray$points[1:nrow(NMDS.bray$points),]
NMDS.bray_pts<-as.data.frame(NMDS.bray_pts)
NMDS.bray_pts$Sample<-row.names(NMDS.bray_pts)
NMDS.bray_pts$Sample=factor(NMDS.bray_pts$Sample, levels = NMDS.bray_pts$Sample)
NMDS.bray_plot <- ggplot(NMDS.bray_pts, aes(x = MDS1, y = MDS2, shape=Sample, fill=Sample), color="black") + 
  geom_point(size=3,aes(fill=Sample)) + 
  theme_bw() +
  scale_shape_manual(values = c(21,21,21,21,21,21,21,21,21,21,21,21,24,24,24,24,24,24,24,24,24,24,24,23,23,23,23,23,23,23,23,23,23,23,23,22,22,22,22,22,22,22,22,22,22,22,22,22,22))+scale_fill_manual(values=c("#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928","#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c", "#fdbf6f", "#ff7f00","#cab2d6","#6a3d9a", "#ffff99", "#b15928"))
NMDS.bray_plot

#### Table1: One-way-ANOVA of MDS 1&2

#This one is a cool lick that matt wrote, and probably uses commonly to isolate pieces of larger data strings.
#sapply takes a variable, X, and then runs a function on it. Here the variable is the result of splitting the depth and month
#in two. The function basically prints the first column,the depth, of the two. 
depth<-sapply(strsplit(as.character(NMDS.bray_pts$Sample), ' '), FUN=function(x){x[1]})
#This lick converts the depths, which are currently strings because of the letter, to integers (to do some math with)
#the input is the result of substituting all m's with a blank, sorta like what we do with sed.
depth<-as.integer(gsub(pattern='m', replacement='', x=depth))
#repeated for the months.
month<-sapply(strsplit(as.character(NMDS.bray_pts$Sample), ' '), FUN=function(x){x[2]})
#here we conduct an anova comparing the points along the axis and the depths and months.
#for some reason we get different results when looking at the month and depth simultaneously.
anova(lm(NMDS.bray_pts$MDS1~month*depth))
## Analysis of Variance Table
## 
## Response: NMDS.bray_pts$MDS1
##             Df Sum Sq Mean Sq F value             Pr(>F)    
## month        3    0.2     0.1    0.18               0.91    
## depth        1   42.4    42.4  148.24 0.0000000000000073 ***
## month:depth  3    0.1     0.0    0.13               0.94    
## Residuals   39   11.2     0.3                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Below examples illustrate that according to MDS1, depth expains all of the variation. 
#ANOVA on MDS1
anova(lm(NMDS.bray_pts$MDS1~month))
## Analysis of Variance Table
## 
## Response: NMDS.bray_pts$MDS1
##           Df Sum Sq Mean Sq F value Pr(>F)
## month      3    0.2   0.051    0.04   0.99
## Residuals 43   53.7   1.249
anova(lm(NMDS.bray_pts$MDS1~depth))
## Analysis of Variance Table
## 
## Response: NMDS.bray_pts$MDS1
##           Df Sum Sq Mean Sq F value              Pr(>F)    
## depth      1   42.4    42.4     167 <0.0000000000000002 ***
## Residuals 45   11.5     0.3                                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Repeat for MDS2
anova(lm(NMDS.bray_pts$MDS2~month))
## Analysis of Variance Table
## 
## Response: NMDS.bray_pts$MDS2
##           Df Sum Sq Mean Sq F value Pr(>F)
## month      3   0.38   0.126    0.69   0.56
## Residuals 43   7.82   0.182
anova(lm(NMDS.bray_pts$MDS2~depth))
## Analysis of Variance Table
## 
## Response: NMDS.bray_pts$MDS2
##           Df Sum Sq Mean Sq F value Pr(>F)  
## depth      1    1.1   1.096    6.95  0.011 *
## Residuals 45    7.1   0.158                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#######Figure 1: Multi-Seasonal, Low resolution/ major protistan group taxa barplot
DBin_nometa = DBin[DBin$Taxa != "Opisthokont-Metazoa",] #Remove metazoan ASVs (optional)
data.m = melt(DBin_nometa)
## Using ASV.ID, Taxonomy, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9, Level10, Level11, Level12, Taxa as id variables
data.agg<-aggregate(data.m$value, by=list(Taxa=data.m$Taxa,Samples=data.m$variable),sum) #sum sequences by taxonomic group
# save(data.agg, data.m, data_binned, file="Checkpoint1.Rdata")
# Make the order and colors
tax_order=c("Alveolates-Ciliates","Dinoflagellates-other","Alveolates-Dinophyceae","Alveolates-Syndiniales","Amoebozoa","Excavates","Rhizaria-other","Rhizaria-Radiolaria","Rhizaria-Cercozoa","Rhizaria-Foramnifera","Archaeplastids","Cryptophytes",    "Stramenopiles-Ochrophyta","Stramenopiles-Diatoms","Stramenopiles-Pelagophytes","Stramenopiles-Other","Stramenopiles-MAST",    "Hacrobia-other","Hacrobia-Haptophytes","Opisthokont-Choanoflagellida","Opisthokont-Fungi","Opisthokonts-Other","Other/unknown")
tax_color = c("#7f0000","#b30000","#e31a1c","#ec7014",   "#8c510a","#dfc27d",    "#08306b","#08519c","#6baed6","#7fcdbb",     "#e7298a","#ffff99",      "#e5f5e0","#a1d99b","#41ab5d","#006d2c","#00441b",      "#8c510a","#f6e8c3",   "#d8daeb","#b2abd2","#542788","#2d004b","#f7f7f7")

names(tax_color)<-tax_order
data.agg$Taxa<-factor(data.agg$Taxa, levels=rev(tax_order)) #factoring to keep the order

#Bar plot of community composition
Relative_abund = ggplot(data.agg[order(data.agg$Taxa),], aes(y=x,fill=tax,x=Samples))+
  geom_bar(position = "fill", stat = "identity", color="black",aes(fill=Taxa))+
  scale_fill_manual(values=tax_color)+
  labs(title="", x="",y="Relative abundance of reads")+
  theme_bw()+
  theme(legend.position="right",axis.text.x = element_text(angle=45, hjust=1,vjust=1,color="black"))
Relative_abund

################# Concatenate Depths into Three Depth Zones ####################
#Concatinate the seasons
taxa_info = DBin[,49:62]
ASV.ID = DBin$ASV.ID
m5 = apply(DBin[,c(2,14,25,37)],1,mean)
m25 = apply(DBin[,c(3,15,26,38)],1,mean)
m45 = apply(DBin[,c(4,27,39)],1,mean)
m75 = apply(DBin[,c(5,16,28,40)],1,mean)
m100 = apply(DBin[,c(6,17,29,41)],1,mean)
m125 = apply(DBin[,c(7,18,30,42)],1,mean)
m150 = apply(DBin[,c(8,19,31,43)],1,mean)
m175 = apply(DBin[,c(9,20,32,44)],1,mean)
m300 = apply(DBin[,c(10,21,33,45)],1,mean)
m400 = apply(DBin[,c(11,22,34,46)],1,mean)
m500 = apply(DBin[,c(12,23,35,47)],1,mean)
m770 = apply(DBin[,c(13,24,36,48)],1,mean)
Seasons_merged = data.frame(ASV.ID, m5,m25,m45,m75,m100,m125,m150,m175,m300,m400,m500,m770,taxa_info)
#dim(Seasons_merged)
#head(Seasons_merged)
merged.colnames = c("5m","25m","45m","75m","100m","125m","150m","175m","300m","400m","500m","770m")

################### Figure 4a Hierarchial cluster combined data #################
###This is a check to see if the pattern holds.
Seasons_merged_numers = Seasons_merged[,2:13]
colnames(Seasons_merged_numers)=merged.colnames
Season_relabundance = decostand(Seasons_merged_numers, MARGIN = 2, method = "total")
Season_relabundance.t = t(Season_relabundance)

Zonescluster=hclust(dist(Season_relabundance.t), method="average")
Merged_cluster = plot(Zonescluster)

############## Figure 5a-j Individual Taxa profiles ############
##Surface Dwellers##


############## Zones and Individual depth & Shannon, inv-simpson (Alpha Diversity), and ASV count ############
#First isolate only numeric columns
#Zones
Sufeuph_DatCols = Seasons_merged[,2:5]
SubSufeuph_DatCols = Seasons_merged[,6:9]
Subeuph_DatCols = Seasons_merged[,10:13]
#Individual
All_DataCols = Seasons_merged[,2:13]

#renaming the columns to have numbers only for conveinent coding.
colnames(Sufeuph_DatCols)=c("1","2","3","4")
colnames(SubSufeuph_DatCols)=c("1","2","3","4")
colnames(Subeuph_DatCols)=c("1","2","3","4")
colnames(All_DataCols)=c("5","25","45","75","100","125","150","175","300","400","500","770")

#Calculate Shannon diversity indicees
Sufeuph_Shannon = diversity(Sufeuph_DatCols[1:4], index = "shannon",2)
SubSufeuph_Shannon = diversity(SubSufeuph_DatCols[1:4], index = "shannon",2)
Subeuph_Shannon = diversity(Subeuph_DatCols[1:4], index = "shannon",2)
All_Shannon = diversity(All_DataCols[1:12], index = "shannon",2)
Shannon_bound = data.frame(Sufeuph_Shannon,SubSufeuph_Shannon,Subeuph_Shannon)
ShBd.M = melt(Shannon_bound)
## No id variables; using all as measure variables
#ggplot(ShBd.M, aes(x=variable, y=value, colour = variable)) + geom_violin() + geom_boxplot(width = .1, colour = "black", outlier.colour = "black")
ggplot(ShBd.M, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

#Repeat with inv simpson (Figure 2b)
Sufeuph_Sim = diversity(Sufeuph_DatCols[1:4], index = "invsimpson",2)
SubSufeuph_Sim = diversity(SubSufeuph_DatCols[1:4], index = "invsimpson",2)
Subeuph_Sim = diversity(Subeuph_DatCols[1:4], index = "invsimpson",2)
All_Sim = diversity(All_DataCols[1:12], index = "invsimpson",2)
Sim_bound = data.frame(Sufeuph_Sim,SubSufeuph_Sim,Subeuph_Sim)
SiBd.M = melt(Sim_bound)
## No id variables; using all as measure variables
ggplot(SiBd.M, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

############## Figure 2c Calculate ASV total counts #################
ASVCount.S <- colSums(Sufeuph_DatCols>0)
ASVCount.M <- colSums(SubSufeuph_DatCols>0)
ASVCount.D <- colSums(Subeuph_DatCols>0)
All_ASVCount <- colSums(All_DataCols>0)
ASVCount_DF = data.frame(ASVCount.S,ASVCount.M,ASVCount.D)
ASVC.Melted = melt(ASVCount_DF)
## No id variables; using all as measure variables
#boxplot it
ggplot(ASVC.Melted, aes(x=variable, y=value, fill = variable)) + geom_boxplot(width = .3)

#violin plot it
#ggplot(ASVC.Melted, aes(x=variable, y=value, colour = variable)) + geom_violin() + geom_boxplot(width = .1, colour = "black", outlier.colour = "black")
######## Finally plot all discrete depths alpha diversity
#All depths melted plot
All_Datafram = data.frame(All_Shannon,All_Sim,All_ASVCount)
All_Datafram$Depth = c("5","25","45","75","100","125","150","175","300","400","500","770")
All_Datafram$Depth = factor(All_Datafram$Depth, levels = All_Datafram$Depth)
All_Datafram.M = melt(All_Datafram)
## Using Depth as id variables
#head(All_Datafram.M)
ggplot(All_Datafram.M, aes(x=Depth, y=value, fill=variable, shape=variable))+geom_point(size=3, aes(color=variable))+facet_grid(variable~.,scales="free")+theme_bw()+theme(axis.text.x = element_text(angle = 90,hjust=1,vjust=0.5,size=8),axis.text.y=element_text(size=12),legend.position = "top")

#write.csv(Seasons_merged, file = "Seasons_merged.csv") #curate with krona excel - https://github.com/marbl/Krona/wiki

############## BALOON PLOTS #### 
###### Figure 6 Syndiniales ASV distribution

Syndiniales_ASV_table = Seasons_merged[Seasons_merged$Level4 == "Syndiniales",]
ASV_totals = apply(Syndiniales_ASV_table[2:13],1,sum)
Abundant_ASVs = ASV_totals[ASV_totals>15000]

sequence.variant1 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "407ae8e50f14b28b4ee9d2a916027f36",2:13],2,sum)
sequence.variant2 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "76079a7c02538b445611d90603dcdd84",2:13],2,sum)
sequence.variant3 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "41b1fc47277b0f91d2057475fd198356",2:13],2,sum)
sequence.variant4 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "0710f08b3fd6a82916211d4b4ac6c520",2:13],2,sum)
sequence.variant5 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "68b1207a2a740016122faaa91c530cd5",2:13],2,sum)
sequence.variant6 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "e269879576d49f0de9fa24a496e8c480",2:13],2,sum)
sequence.variant7 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "fad8d44de4b6cb7bf9307c2106c2f4f0",2:13],2,sum)
sequence.variant8 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "0146dde243b672179d69288c4bc367b4",2:13],2,sum)
sequence.variant9 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "73af8b0a55f3a6bbf16518ac8655f63a",2:13],2,sum)
sequence.variant10 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "4f9ba8b4deacee7453670af0438970f1",2:13],2,sum)
sequence.variant11 = apply(Syndiniales_ASV_table[Syndiniales_ASV_table$ASV.ID == "dcfecdeedead18507c1eeec9ef4f50f9",2:13],2,sum)

Synd_DF = data.frame(sequence.variant1,sequence.variant2,sequence.variant3,sequence.variant4,sequence.variant5,sequence.variant6,sequence.variant7,sequence.variant8,sequence.variant9,sequence.variant10,sequence.variant11)

Syndf = cbind(rownames(Synd_DF), Synd_DF) # add the sample column to the dataset
names(Syndf)[1] = "sample"
Syndf$sample=factor(Syndf$sample, levels = Syndf$sample) #Cool trick to keep the order of samples in the plot.
Syndf.m = melt(Syndf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Synd_baloon = ggplot(Syndf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=12) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Synd_baloon

#### Figure S3b diatom.diversity

Diatom.Seasons_merged = Seasons_merged[Seasons_merged$Level4 == "Bacillariophyta",]
diatomlist = Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 %in% unique(Diatom.Seasons_merged$Level7),]
unique(diatomlist$Level7) #The diatom species 
##  [1] "Pseudo-nitzschia"             "XXX"                         
##  [3] "Chaetoceros"                  "Thalassiosira"               
##  [5] "Polar-centric-Mediophyceae_X" "Guinardia"                   
##  [7] "Raphid-pennate_X"             "Cylindrotheca"               
##  [9] "Pleurosigma"                  "Nitzschia"                   
## [11] "Asteromphalus"                "Minutocellus"                
## [13] "Hemiaulus"                    "Actinocyclus"                
## [15] "Stellarima"                   "Navicula"                    
## [17] "Corethron"                    "Cerataulina"
nrow(Diatom.Seasons_merged) #Number of diatom ASVs
## [1] 322
#Sum the number of reads for each diatom species
Pnitz = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Pseudo-nitzschia",2:13],2,sum)
Chaetoceros = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Chaetoceros",2:13],2,sum)
Mediophyceae = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Mediophyceae",2:13],2,sum)
pennate = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Raphid-pennate",2:13],2,sum)
Pleurosigma = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Pleurosigma",2:13],2,sum)
Hemiaulus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Hemiaulus",2:13],2,sum)
Stellarima = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Stellarima",2:13],2,sum)
Corethron = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Corethron",2:13],2,sum)
Thalassiosira = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Thalassiosira",2:13],2,sum)
Guinardia = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Guinardia",2:13],2,sum)
Cylindrotheca = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Cylindrotheca",2:13],2,sum)
Nitzschia = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Nitzschia",2:13],2,sum)
Minutocellus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Minutocellus",2:13],2,sum)
Actinocyclus = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Actinocyclus",2:13],2,sum)
Navicula = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Navicula",2:13],2,sum)
Cerataulina = apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "Cerataulina",2:13],2,sum)
unknown =  apply(Diatom.Seasons_merged[Diatom.Seasons_merged$Level7 == "XXX",2:13],2,sum)
Diatom.dataframe = data.frame(unknown,Pnitz,Chaetoceros,Mediophyceae,pennate,Pleurosigma,Hemiaulus,Stellarima,Corethron,Thalassiosira,Guinardia,Cylindrotheca,Nitzschia,Minutocellus,Actinocyclus,Navicula,Cerataulina)
Diatdf = cbind(rownames(Diatom.dataframe), Diatom.dataframe) # add the sample column to the dataset
names(Diatdf)[1] = "sample"
Diatdf$sample=factor(Diatdf$sample, levels = Diatdf$sample) #Cool trick to keep the order of samples in the plot.
Diatdf.m = melt(Diatdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Diatoms_baloon = ggplot(Diatdf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=10) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="red"))
Diatoms_baloon

### Figure S3a Dinophyceae 
Dinophyceae.Seasons_merged = Seasons_merged[Seasons_merged$Level4 == "Dinophyceae",]
unique(Dinophyceae.Seasons_merged$Level7) #dinoflagellate species
##  [1] "Dinophyceae_XXX"  "XXX"              "Prorocentrum"     "Protoperidinium" 
##  [5] "Blastodinium"     "Gyrodinium"       "Gymnodinium"      "Cochlodinium"    
##  [9] "Heterocapsa"      "Alexandrium"      "Karlodinium"      "Phalacroma"      
## [13] "Takayama"         "Noctiluca"        "Kofoidinium"      "Warnowia"        
## [17] "Woloszynskia"     "Gonyaulax"        "Lepidodinium"     "Neoceratium"     
## [21] "Lingulodinium"    "Abedinium"        "Suessiales_XX"    "Protoceratium"   
## [25] "Archaeperidinium" "Symbiodinium"     "Pyrodinium"
nrow(Dinophyceae.Seasons_merged) #number of dinoflagellate ASVs
## [1] 1449
#Sum species
Unk.dino = (apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "XXX",2:13],2,sum) + apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Dinophyceae_XXX",2:13],2,sum))
Blastodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Blastodinium",2:13],2,sum)
Heterocapsa = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Heterocapsa",2:13],2,sum)
Takayama = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Takayama",2:13],2,sum)
Woloszynskia = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Woloszynskia",2:13],2,sum)
Lingulodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Lingulodinium",2:13],2,sum)
Archaeperidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Archaeperidinium",2:13],2,sum)
Gyrodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gyrodinium",2:13],2,sum)
Alexandrium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Alexandrium",2:13],2,sum)
Noctiluca = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Noctiluca",2:13],2,sum)
Gonyaulax = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gonyaulax",2:13],2,sum)
Abedinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Abedinium",2:13],2,sum)
Symbiodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Symbiodinium",2:13],2,sum)
Prorocentrum = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Prorocentrum",2:13],2,sum)
Gymnodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Gymnodinium",2:13],2,sum)
Kofoidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Kofoidinium",2:13],2,sum)
Lepidodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Lepidodinium",2:13],2,sum)
Suessiales = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Suessiales_XX",2:13],2,sum)
Pyrodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Pyrodinium",2:13],2,sum)
Protoperidinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Protoperidinium",2:13],2,sum)
Cochlodinium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Cochlodinium",2:13],2,sum)
Phalacroma = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Phalacroma",2:13],2,sum)
Warnowia = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Warnowia",2:13],2,sum)
Neoceratium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Neoceratium",2:13],2,sum)
Protoceratium = apply(Dinophyceae.Seasons_merged[Dinophyceae.Seasons_merged$Level7 == "Protoceratium",2:13],2,sum)
Dinophyceae.dataframe = data.frame(Unk.dino,Blastodinium,Heterocapsa,Takayama,Woloszynskia,Lingulodinium,Archaeperidinium,Gyrodinium,Alexandrium,Noctiluca,Gonyaulax,Abedinium,Symbiodinium,Prorocentrum,Gymnodinium,Kofoidinium,Lepidodinium,Suessiales,Pyrodinium,Protoperidinium,Cochlodinium,Phalacroma,Warnowia,Neoceratium,Protoceratium,Diatom.dataframe)
Dinodf = cbind(rownames(Dinophyceae.dataframe), Dinophyceae.dataframe) # add the sample column to the dataset
names(Dinodf)[1] = "sample"
Dinodf$sample=factor(Dinodf$sample, levels = Dinodf$sample) #Cool trick to keep the order of samples in the plot.
Dinodf.m = melt(Dinodf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Dinophyceae_baloon = ggplot(Dinodf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=10) + theme_bw() + theme(axis.text.y = element_text(angle = 45)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="red"))
Dinophyceae_baloon

# Dinoflagellate read counts and proportions
Dinosum = sum(colSums(Dinophyceae.dataframe)) #Number of dinoflagellate reads
big3_readcount=sum(Dinophyceae.dataframe$Gyrodinium)+sum(Dinophyceae.dataframe$Prorocentrum)+sum(Dinophyceae.dataframe$Gymnodinium)
big3_readcount/Dinosum
## [1] 0.3185
##### Figure S2 Mast, Haptophyte, Radiolaria, Ciliate distributions
###### MAST distribution  #####
MAST.3 = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-3I",2:13],2,sum)
MAST.1C = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-1C",2:13],2,sum)
MAST.7 = apply(Seasons_merged[Seasons_merged$Level5 == "MAST-7",2:13],2,sum)
MAST.25_X = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-25_X",2:13],2,sum)
MAST.3K = apply(Seasons_merged[Seasons_merged$Level6 == "MAST-3K",2:13],2,sum)
Mast_DF = data.frame(MAST.3,MAST.1C,MAST.7,MAST.25_X,MAST.3K)
Mastdf = cbind(rownames(Mast_DF), Mast_DF) # add the sample column to the dataset
names(Mastdf)[1] = "sample"
Mastdf$sample=factor(Mastdf$sample, levels = Mastdf$sample) #Cool trick to keep the order of samples in the plot.
Mastdf.m = melt(Mastdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Mastdf_baloon = ggplot(Mastdf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=12) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Mastdf_baloon

#####   Figure S2 Haptophytes    ########
Haptophytes.Seasons_merged = Seasons_merged[Seasons_merged$Level3 == "Haptophyta",]
unique(Haptophytes.Seasons_merged$Level5)
##  [1] "XXX"                      "Prymnesiales"            
##  [3] "Haptophyta_Clade_HAP3_X"  "Haptophyta_Clade_HAP4_X" 
##  [5] "Haptophyta_XX"            "Haptophyta_Clade_HAP2_X" 
##  [7] "Prymnesiophyceae_Clade_E" "Phaeocystales"           
##  [9] "Prymnesiophyceae_X"       "Prymnesiophyceae_Clade_D"
## [11] "Isochrysidales"           "Coccolithales"
unique(Haptophytes.Seasons_merged$Level6)
##  [1] "XXX"                        "Chrysochromulinaceae"      
##  [3] "Haptophyta_Clade_HAP3_XX"   "Haptophyta_Clade_HAP4_XX"  
##  [5] "Prymnesiophyceae_Clade_B3"  "Haptophyta_XXX"            
##  [7] "Prymnesiaceae"              "Haptophyta_Clade_HAP2_XX"  
##  [9] "Prymnesiophyceae_Clade_E_X" "Phaeocystaceae"            
## [11] "Braarudosphaeraceae"        "Prymnesiophyceae_Clade_D_X"
## [13] "Prymnesiophyceae_Clade_B4"  "Noelaerhabdaceae"          
## [15] "Chrysotilaceae"             "Prymnesiophyceae_XX"       
## [17] "Calyptrosphaeraceae"
nrow(Haptophytes.Seasons_merged) #haptophyte ASVs 423
## [1] 423
#Sum Species
Prymnesiales = apply(Seasons_merged[Seasons_merged$Level5 == "Prymnesiales",2:13],2,sum)
Phaeocystis = apply(Seasons_merged[Seasons_merged$Level5 == "Phaeocystales",2:13],2,sum)
Hapto.other = apply(Seasons_merged[Seasons_merged$Level3 == "Haptophyta",2:13],2,sum)
Hap4 = apply(Seasons_merged[Seasons_merged$Level5 == "Haptophyta_Clade_HAP4_X",2:13],2,sum)
PrymnClade_D = apply(Seasons_merged[Seasons_merged$Level5 == "Prymnesiophyceae_Clade_D",2:13],2,sum)
Cocco = apply(Seasons_merged[Seasons_merged$Level5 == "Coccolithales",2:13],2,sum)
Clade.B3 = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiophyceae_Clade_B3",2:13],2,sum)
Clade.EX = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiophyceae_Clade_E_X",2:13],2,sum)
Prymnesiaceae = apply(Seasons_merged[Seasons_merged$Level6 == "Prymnesiaceae",2:13],2,sum)
Chrysochromulinaceae = apply(Seasons_merged[Seasons_merged$Level6 == "Chrysochromulinaceae",2:13],2,sum)
Hapto_DF = data.frame(Hapto.other,Chrysochromulinaceae,Phaeocystis,Hap4,PrymnClade_D,Cocco,Clade.B3,Clade.EX,Prymnesiaceae)
Hapdf = cbind(rownames(Hapto_DF), Hapto_DF) # add the sample column to the dataset
names(Hapdf)[1] = "sample"
Hapdf$sample=factor(Hapdf$sample, levels = Hapdf$sample) #Cool trick to keep the order of samples in the plot.
Hapdf.m = melt(Hapdf) #reformat to long format for plotting
## Using sample as id variables
#Baloon plot it with ggplot
Hapto_baloon = ggplot(Hapdf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Hapto_baloon

###### Figure S2 Radiolaria ######
RadMerged_Table = Seasons_merged[Seasons_merged$Level3 == "Radiolaria",]
Radb_Merged_Table = RadMerged_Table[RadMerged_Table$Level4 == "RAD-B",]
Acanth_Merged_Table = RadMerged_Table[RadMerged_Table$Level4 == "Acantharea",]
length(unique(RadMerged_Table$ASV.ID))
## [1] 1269
uniques.radiolaria = unique(RadMerged_Table$Level4)
#Sum species reads
Rad.Other = apply(Seasons_merged[Seasons_merged$Level4 == "XXX",2:13],2,sum)
Rad.Other = Rad.Other + apply(Seasons_merged[Seasons_merged$Level4 == "Radiolaria_X",2:13],2,sum)
Acantharia = apply(Seasons_merged[Seasons_merged$Level4 == "Acantharea",2:13],2,sum)
RAD.A = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-A",2:13],2,sum)
RAD.B = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-B",2:13],2,sum)
RAD.C = apply(Seasons_merged[Seasons_merged$Level4 == "RAD-C",2:13],2,sum)
Polycyst = apply(Seasons_merged[Seasons_merged$Level4 == "Polycystinea",2:13],2,sum)
Radiolaria_DF = data.frame(Acantharia,RAD.A,RAD.B,RAD.C,Polycyst,Rad.Other)
Rdf = cbind(rownames(Radiolaria_DF), Radiolaria_DF)
names(Rdf)[1] = "sample"
Rdf$sample=factor(Rdf$sample, levels = Rdf$sample) #Cool trick to keep the order of samples in the plot.
Rdf.m = melt(Rdf)
## Using sample as id variables
#Baloon plot it
Rad_baloon = ggplot(Rdf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Rad_baloon

######## Figure S2 Ciliates ##
# Make a baloon plot from which we can extrapolate the distribution patterns of the following ciliate subclasses:
# Spirotrichea, Prostomatea, Colpodea, Litostomatea, Phyllopharyngea, Oligohymenophorea, and a group called Ciliate_others
CiliateMerged_Table = Seasons_merged[Seasons_merged$Level3 == "Ciliophora",]
#Sum number of reads
Spirotrich = apply(Seasons_merged[Seasons_merged$Level4 == "Spirotrichea",2:13],2,sum)
Phyllopharyngea = apply(Seasons_merged[Seasons_merged$Level4 == "Phyllopharyngea",2:13],2,sum)
Oligohym = apply(Seasons_merged[Seasons_merged$Level4 == "Oligohymenophorea",2:13],2,sum)
Litostome = apply(Seasons_merged[Seasons_merged$Level4 == "Litostomatea",2:13],2,sum)
Colpodea = apply(Seasons_merged[Seasons_merged$Level4 == "Colpodea",2:13],2,sum)
Prostomatea = apply(Seasons_merged[Seasons_merged$Level4 == "Prostomatea",2:13],2,sum)
Other.Ciliate = apply(Seasons_merged[Seasons_merged$Level4 == "XXX",2:13],2,sum)
Ciliate_DF = data.frame(Spirotrich,Phyllopharyngea,Oligohym,Litostome,Colpodea,Prostomatea,Other.Ciliate)
Cdf = cbind(rownames(Ciliate_DF), Ciliate_DF)
names(Cdf)[1] = "sample"
Cdf$sample=factor(Cdf$sample, levels = Cdf$sample) #Cool trick to keep the order of samples in the plot.
Cdf.m = melt(Cdf)
## Using sample as id variables
#Baloon plot it
Cili_baloon = ggplot(Cdf.m, aes(x=sample, y=variable)) +
  geom_point(aes(size=value, fill=factor(variable)), shape=21) +
  scale_size_area(max_size=15) + theme_bw() + theme(axis.text.y = element_text(angle = 90)) + theme(axis.text.x = element_text(angle=90, hjust=1,vjust=1,color="black"))
Cili_baloon

########### Figure 3 #####
#making upsetR plot
# 1. Read in an ASV table
Seasons_merged_Upset = Seasons_merged

# 2. Make binary columns using if else statement
Seasons_merged_Upset$binary_5m = ifelse(Seasons_merged_Upset$m5>10,1,0)
Seasons_merged_Upset$binary_25m = ifelse(Seasons_merged_Upset$m25>10,1,0)
Seasons_merged_Upset$binary_45m = ifelse(Seasons_merged_Upset$m45>10,1,0)
Seasons_merged_Upset$binary_75m = ifelse(Seasons_merged_Upset$m75>10,1,0)
Seasons_merged_Upset$binary_100m = ifelse(Seasons_merged_Upset$m100>10,1,0)
Seasons_merged_Upset$binary_125m = ifelse(Seasons_merged_Upset$m125>10,1,0)
Seasons_merged_Upset$binary_150m = ifelse(Seasons_merged_Upset$m150>10,1,0)
Seasons_merged_Upset$binary_175m = ifelse(Seasons_merged_Upset$m175>10,1,0)
Seasons_merged_Upset$binary_300m = ifelse(Seasons_merged_Upset$m300>10,1,0)
Seasons_merged_Upset$binary_400m = ifelse(Seasons_merged_Upset$m400>10,1,0)
Seasons_merged_Upset$binary_500m = ifelse(Seasons_merged_Upset$m500>10,1,0)
Seasons_merged_Upset$binary_770m = ifelse(Seasons_merged_Upset$m770>10,1,0)
#Plot using upsetR
upset(Seasons_merged_Upset, keep.order = TRUE, sets = c("binary_770m","binary_500m","binary_400m","binary_300m","binary_175m","binary_150m","binary_125m","binary_100m","binary_75m","binary_45m","binary_25m","binary_5m") , mainbar.y.label = "Number of ASVs", text.scale = 1.5)